#R4.0

rm(list=ls())
rewrite=FALSE

condaENV <- "/home/chenzh/miniconda3/envs/R4.0"
LBpath <- paste0(condaENV ,"/lib/R/library")
.libPaths(LBpath)

suppressMessages(library(dplyr))
suppressMessages(library(tidyr))
suppressMessages(library(ggplot2))
suppressMessages(library(scran))
suppressMessages(library(batchelor))
suppressMessages(library(Seurat))
suppressMessages(library(harmony))

#source("~/PC/SnkM/SgCell.R")
rename <- dplyr::rename


# working directory
DIR <- "~/My_project/CheckBlastoids"
setwd(DIR)
knitr::opts_knit$set(root.dir=DIR)
source("src/local.quick.fun.R")

options(digits = 4)
options(future.globals.maxSize= 3001289600)
TD="Nov14_2021"
MM="Pub"

nGene <- 2000;pc <- 25

rename <- dplyr::rename
select<- dplyr::select
filter <- dplyr::filter

loading data

load("tmp_data/gene.meta.Rdata",verbose=T)
## Loading objects:
##   gtf.anno
##   mt.gene
##   ribo.gene
counts.filter <- readRDS(paste0("tmp_data/",TD,"/counts.filter.rds"))

meta.filter<- readRDS(paste0("tmp_data/",TD,"/meta.filter.rds")) %>% filter(pj %in% c("EBD2","IBD2","JPF2019","D3post","CS7","SPH2016","Blakeley","nBGuo","nicolBla") ) %>% mutate(EML=ifelse(EML %in% c("Ectoderm"),"Amnion",EML)) %>% filter(pj %in% c("CS7","EBD2","IBD2","nBGuo","nicolBla"))

loading NHP dataset

load(paste0("tmp_data/",TD,"/NHP.Rdata"),verbose=T)
## Loading objects:
##   GI.list
##   NHP.counts.mod
##   NHP.meta

check the overlapped genes

GI.list$ov %in% rownames(counts.filter) %>% table()
## .
##  TRUE 
## 14846
GI.list$ov %in% rownames(NHP.counts.mod) %>% table()
## .
##  TRUE 
## 14846
# 14846 are overlapped

combine human+monkey, for NHP dataset only choose the D14

meta <- meta.filter %>% bind_rows(NHP.meta %>% mutate(seqType="10X", cellType="EM") %>% filter(devTime=="E14"))
counts <- (counts.filter[GI.list$ov,] %>% cbind(NHP.counts.mod[GI.list$ov,]))[,meta$cell]

rename EML annotation

meta <- meta %>% mutate(rename_EML=EML) %>% mutate(rename_EML=ifelse(rename_EML %in% "Trans" & pj=="NHP","EPI-AM-trans",rename_EML))%>% mutate(rename_EML=ifelse(rename_EML%in% c("AdvMes","AxMes","EmMes","NasMes","Mes1","Mes2"),"Mesoderm",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML%in% c("CTB","EVT","STB","TE","EarlyTE"),"TE",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML%in% c("MeLC2","MeLC1"),"MeLC",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML%in% c("Tsw-AMLC","AMLC"),"AMLC",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML%in% c("PrE"),"Endoderm",rename_EML))  %>% mutate(rename_EML=ifelse(rename_EML %in% c("EB_ELC","IB_Epi","nicolBla_ELC"),"ELC",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML %in% c("EB_HLC","IB_PE","nicolBla_HLC"),"HLC",rename_EML))%>% mutate(rename_EML=ifelse(rename_EML %in% c("EB_TLC","IB_TE","nicolBla_TLC"),"TLC",rename_EML))  %>% mutate(rename_EML=ifelse(rename_EML%in% c("EB_U10","EB_U5","EB_U6","EB_UE7","EB_UI13","IB_IM1","IB_IM2","IB_IM3","IB_uc","IB_NR","Trans","nicolBla_nc"),"Undef",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML %in% "Amnion1","E-Amnion",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML %in% "Amnion2","Amnion",rename_EML)) %>% mutate(rename_EML=ifelse(rename_EML %in% "EPI","Epiblast",rename_EML))



expG.set <- list()
for (b in unique(meta$pj  %>% unique() %>% as.vector())) { 
  temp.cell <- meta %>% filter(pj==b) %>% pull(cell)
  expG.set[[b]] <- rownames(counts )[rowSums(counts[,temp.cell] >=1) >=5]
}
sel.expG <-unlist(expG.set) %>% unique() %>% as.vector()

if (file.exists(file=paste0("tmp_data/",TD,"/NHP.IT.umap.rds")) & !rewrite) {
  data.umap.list <- readRDS(paste0("tmp_data/",TD,"/NHP.IT.umap.rds"))
}else{
  
  data.umap.list <- list()
  for (s in c("CS7","EBD2","IBD2","nBGuo","nicolBla")) {
    print(paste0("now it is ",s ))
    
    # run scran normalization followed by multiBatchNorm  ( it doesn't effect the integration results can be removed)
    # sce.ob <- list()
    # for (b in c("NHP",s)) {
    #   print(b)
    #   temp.M <- meta %>% filter(pj==b)
    #   temp.sce <-  SingleCellExperiment(list(counts=as.matrix(counts[sel.expG,temp.M$cell])),colData=(temp.M %>% tibble::column_to_rownames("cell"))) %>% computeSumFactors()
    #   sce.ob[[b]] <- temp.sce
    # }
    # mBN.sce.ob <- multiBatchNorm(sce.ob[[1]],sce.ob[[2]])
    # lognormExp.mBN<- mBN.sce.ob %>% lapply(function(x) {logcounts(x) %>% as.data.frame()  %>% return()}) %>% do.call("bind_cols",.)

    #' prepare for integration
    temp.M <-  meta %>% filter(pj=="NHP") %>% bind_rows(meta %>% filter(pj==s))
    temp.sel.cell <- temp.M$cell
    data.merge <- CreateSeuratObject(counts[sel.expG,temp.sel.cell], meta.data = (temp.M %>% tibble::column_to_rownames("cell"))) %>% NormalizeData(verbose = FALSE) 
    #data.merge@assays$RNA@data <- as.matrix(lognormExp.mBN[rownames(lognormExp.mBN),colnames(data.merge)]) 
    data.spt <- SplitObject(data.merge, split.by = "pj")%>% lapply(function(x){x=FindVariableFeatures(x,verbose=F,nfeatures=2000)})
    
    #' fastmnn  
    data.spt <- data.spt;set.seed(123)
    data.temp <- SeuratWrappers::RunFastMNN(data.spt,verbose=F,features=nGene) %>% RunUMAP( reduction = "mnn", dims = 1:pc,verbose=F) 
    saveRDS(data.temp,paste0("tmp_data/",TD,"/NHP.",s,".mnn.rds"))
    data.umap.list[[paste0("NHP.",s,".mnn")]]<- data.temp@meta.data %>% as.data.frame() %>% tibble::rownames_to_column("cell") %>% tbl_df() %>% select(c(cell,SID:rename_EML)) %>% inner_join(data.temp@reductions$umap@cell.embeddings %>% as.data.frame() %>% tibble::rownames_to_column("cell") %>% tbl_df(),by="cell") 
    
    #' seurat CCA
    sel.features <- SelectIntegrationFeatures(object.list = data.spt, nfeatures = nGene,verbose=F)
    data.temp <- FindIntegrationAnchors(object.list = data.spt, dims = 1:pc,anchor.features=sel.features,verbose=F) %>% IntegrateData(dims = 1:pc,verbose=F) %>% ScaleData(verbose = FALSE) %>% RunPCA(npcs = pc, verbose = FALSE) %>%  RunUMAP( reduction = "pca", dims = 1:pc,verbose=F) ### 
    saveRDS(data.temp,paste0("tmp_data/",TD,"/NHP.",s,".cca.rds"))
    data.umap.list[[paste0("NHP.",s,".cca")]]<- data.temp@meta.data %>% as.data.frame() %>% tibble::rownames_to_column("cell") %>% tbl_df() %>% select(c(cell,SID:rename_EML)) %>% inner_join(data.temp@reductions$umap@cell.embeddings %>% as.data.frame() %>% tibble::rownames_to_column("cell") %>% tbl_df(),by="cell") 
    
    #' harmony
    data.temp <- data.merge  %>% FindVariableFeatures(selection.method = "vst", nfeatures = nGene, verbose = FALSE) %>% ScaleData(verbose = FALSE) %>% RunPCA( npcs = pc, verbose = FALSE) %>%  RunHarmony("pj", plot_convergence = TRUE, verbose = FALSE) %>%  RunUMAP(reduction = "harmony", dims = 1:pc, verbose = FALSE)
    saveRDS(data.temp,paste0("tmp_data/",TD,"/NHP.",s,".harmony.rds"))
    data.umap.list[[paste0("NHP.",s,".harmony")]]<- data.temp@meta.data %>% as.data.frame() %>% tibble::rownames_to_column("cell") %>% tbl_df() %>% select(c(cell,SID:rename_EML)) %>% inner_join(data.temp@reductions$umap@cell.embeddings %>% as.data.frame() %>% tibble::rownames_to_column("cell") %>% tbl_df(),by="cell") 
    
  }
 
  saveRDS(data.umap.list,file=paste0("tmp_data/",TD,"/NHP.IT.umap.rds"))
}

the integration results

for (m in c("mnn","cca","harmony")) {
  temp.plot <- list()
  for (s in c("CS7","EBD2","IBD2","nBGuo","nicolBla")) {
    data.temp <- readRDS(paste0("tmp_data/",TD,"/NHP.",s,".",m,".rds"))
    temp.plot[[paste(s,"all")]] <- DimPlot(data.temp,group.by="EML",label=T)+NoAxes()+NoLegend()+ggtitle(paste0("NHP + ", s))+theme(plot.title = element_text(hjust=0.5,face="bold"))
    temp.plot[[paste(s,"split")]] <- DimPlot(data.temp,group.by="EML",split.by="pj",label=T)+NoAxes()+NoLegend()+ggtitle("")+theme(plot.title = element_text(hjust=0.5,face="bold"))
  }
  print(
    cowplot::plot_grid(plotlist=temp.plot,nrow=5,rel_widths=c(1:2))
  )
}